# Download data
url <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE137001&format=file&file=GSE137001%5FTPM%5Freprogramming%5Fintermediates%2Exlsx"
filename <- "GSE137001_TPM_reprogramming_intermediates.xlsx"
if (!file.exists(filename)) {
  download.file(url, filename)
}
tpms <- read_excel(filename)
tpms_mat <- tpms %>% select(3:41) %>% as.matrix()

metadata <- data.frame(sample_name = colnames(tpms_mat)) %>%
  mutate(group = gsub("(d\\d+_|_R\\d)", "", sample_name)) %>%
  mutate(days = case_when(
    startsWith(sample_name, "d") ~ sub("d(\\d+).+", "\\1", sample_name),
    TRUE ~ "NA"
  )) %>%
  mutate(rep = gsub(".+R(\\d)", "\\1", sample_name)) %>%
  column_to_rownames("sample_name")

PCA

# I don't think this transformation is correct
trans_mat <- log10(1 + (tpms_mat %>% t()))

pca_res <- stats::prcomp(trans_mat)

pca_df <- pca_res$x %>%
  as.data.frame() %>%
  rownames_to_column("sample_name") %>%
  mutate(group = gsub("(d\\d+_|_R\\d)", "", sample_name)) %>%
  mutate(days = case_when(
    startsWith(sample_name, "d") ~ sub("d(\\d+).+", "\\1", sample_name),
    TRUE ~ "NA"
  ))

pca_df %>%
  filter(group %in% c("SKM", "OSKM")) %>%
  ggplot(aes(
    x = PC1, y = PC2, color = days, shape = group
  )) + 
  geom_point(size = 3)

# Get gene list to filter
stress_pathways <- c("GO:0036500")  # Just the ATF6 pathway
# stress_pathways <- c("GO:0009267", "GO:0034198", "GO:0036003", "GO:0036500", "GO:1990928", "GO:0034599", "GO:0034976", "GO:0016236", "GO:0010506")

gostres <- gprofiler2::gost(stress_pathways)
stress_ensg <- gostres$meta$genes_metadata$query$query_1$ensgs

ens2sym <- AnnotationDbi::select(
  EnsDb.Hsapiens.v86,
  keys = keys(EnsDb.Hsapiens.v86),
  columns = c("SYMBOL")
)

stress_genes <- ens2sym %>%
  filter(GENEID %in% stress_ensg) %>%
  pull(SYMBOL)
# Filter data for selected stress genes
tpms$Name <- unlist(lapply(tpms$Name, toupper))  # Capitalize gene names
cat("% stress genes in dataset:", 100*mean(stress_genes %in% tpms$Name))
% stress genes in dataset: 100
stress_tpms <- tpms %>% filter(Name %in% stress_genes)
# Select only the SKM and OSKM samples
# Scale the TPMs of each gene to be between 0 and 1
# Note: I couldn't figure out how to scale row-wise, so I transposed instead
scaled_tpms <- stress_tpms[3:41] %>%
  select(contains("SKM_R")) %>%
  t() %>%
  as.data.frame() %>%
  mutate_all(scales::rescale) %>%  # Min-max scaling to be between 0 and 1
  t() %>%
  as_tibble() %>%
  mutate(Gene = stress_tpms$Name) %>%
  relocate(Gene)
# Format data from wide to long for plotting
df <- scaled_tpms %>%
  pivot_longer(
    contains("_"),
    names_to = c("Day", "Factors", "Rep"),
    names_pattern = "d(\\d+)_(\\w+)_R(\\d+)"
  ) %>%
  rename(scaled_TPM = value) %>%
  mutate_if(is.character, as.factor)

Boxplot

df %>%
  ggplot(aes(x = Day, y = scaled_TPM)) +
  stat_boxplot(geom ='errorbar', width = 0.5) +
  geom_boxplot() +
  facet_wrap(~Factors) + 
  geom_jitter(color = "blue", width = 0.05)

Lineplot

ggp <- df %>%
  ggplot(aes(x = Day, y = scaled_TPM, group = Gene)) +
  geom_line(aes(color = Gene)) +
  facet_wrap(~Factors + Rep)

ggplotly(ggp)
res <- aov(scaled_TPM ~ Day, data = df %>% filter(Factors == "OSKM"))
summary(res)
            Df Sum Sq Mean Sq F value   Pr(>F)    
Day          3  1.946  0.6487   7.727 0.000143 ***
Residuals   76  6.380  0.0840                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- aov(scaled_TPM ~ Day, data = df %>% filter(Factors == "SKM"))
summary(res)
            Df Sum Sq Mean Sq F value   Pr(>F)    
Day          3  1.369  0.4562   6.295 0.000715 ***
Residuals   76  5.507  0.0725                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
LS0tCnRpdGxlOiAiQXZlcmFnZSBUUE0gQWNyb3NzIFRpbWUgb2YgU2VsZWN0IEdlbmVzIChHU0UxMzcwMDEpIgphdXRob3I6ICJKYW1lcyBEYW8iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdGhlbWU6IGNlcnVsZWFuCi0tLQoKYGBge3IgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoRW5zRGIuSHNhcGllbnMudjg2KQpsaWJyYXJ5KERFU2VxMikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocGxvdGx5KQpgYGAKCgpgYGB7cn0KIyBEb3dubG9hZCBkYXRhCnVybCA8LSAiaHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9nZW8vZG93bmxvYWQvP2FjYz1HU0UxMzcwMDEmZm9ybWF0PWZpbGUmZmlsZT1HU0UxMzcwMDElNUZUUE0lNUZyZXByb2dyYW1taW5nJTVGaW50ZXJtZWRpYXRlcyUyRXhsc3giCmZpbGVuYW1lIDwtICJHU0UxMzcwMDFfVFBNX3JlcHJvZ3JhbW1pbmdfaW50ZXJtZWRpYXRlcy54bHN4IgppZiAoIWZpbGUuZXhpc3RzKGZpbGVuYW1lKSkgewogIGRvd25sb2FkLmZpbGUodXJsLCBmaWxlbmFtZSkKfQp0cG1zIDwtIHJlYWRfZXhjZWwoZmlsZW5hbWUpCmBgYAoKCmBgYHtyfQp0cG1zX21hdCA8LSB0cG1zICU+JSBzZWxlY3QoMzo0MSkgJT4lIGFzLm1hdHJpeCgpCgptZXRhZGF0YSA8LSBkYXRhLmZyYW1lKHNhbXBsZV9uYW1lID0gY29sbmFtZXModHBtc19tYXQpKSAlPiUKICBtdXRhdGUoZ3JvdXAgPSBnc3ViKCIoZFxcZCtffF9SXFxkKSIsICIiLCBzYW1wbGVfbmFtZSkpICU+JQogIG11dGF0ZShkYXlzID0gY2FzZV93aGVuKAogICAgc3RhcnRzV2l0aChzYW1wbGVfbmFtZSwgImQiKSB+IHN1YigiZChcXGQrKS4rIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSwKICAgIFRSVUUgfiAiTkEiCiAgKSkgJT4lCiAgbXV0YXRlKHJlcCA9IGdzdWIoIi4rUihcXGQpIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSkgJT4lCiAgY29sdW1uX3RvX3Jvd25hbWVzKCJzYW1wbGVfbmFtZSIpCmBgYAoKIyBQQ0EKCmBgYHtyfQojIEkgZG9uJ3QgdGhpbmsgdGhpcyB0cmFuc2Zvcm1hdGlvbiBpcyBjb3JyZWN0CnRyYW5zX21hdCA8LSBsb2cxMCgxICsgKHRwbXNfbWF0ICU+JSB0KCkpKQoKcGNhX3JlcyA8LSBzdGF0czo6cHJjb21wKHRyYW5zX21hdCkKCnBjYV9kZiA8LSBwY2FfcmVzJHggJT4lCiAgYXMuZGF0YS5mcmFtZSgpICU+JQogIHJvd25hbWVzX3RvX2NvbHVtbigic2FtcGxlX25hbWUiKSAlPiUKICBtdXRhdGUoZ3JvdXAgPSBnc3ViKCIoZFxcZCtffF9SXFxkKSIsICIiLCBzYW1wbGVfbmFtZSkpICU+JQogIG11dGF0ZShkYXlzID0gY2FzZV93aGVuKAogICAgc3RhcnRzV2l0aChzYW1wbGVfbmFtZSwgImQiKSB+IHN1YigiZChcXGQrKS4rIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSwKICAgIFRSVUUgfiAiTkEiCiAgKSkKCnBjYV9kZiAlPiUKICBmaWx0ZXIoZ3JvdXAgJWluJSBjKCJTS00iLCAiT1NLTSIpKSAlPiUKICBnZ3Bsb3QoYWVzKAogICAgeCA9IFBDMSwgeSA9IFBDMiwgY29sb3IgPSBkYXlzLCBzaGFwZSA9IGdyb3VwCiAgKSkgKyAKICBnZW9tX3BvaW50KHNpemUgPSAzKQpgYGAKCgoKYGBge3J9CiMgR2V0IGdlbmUgbGlzdCB0byBmaWx0ZXIKc3RyZXNzX3BhdGh3YXlzIDwtIGMoIkdPOjAwMzY1MDAiKSAgIyBKdXN0IHRoZSBBVEY2IHBhdGh3YXkKIyBzdHJlc3NfcGF0aHdheXMgPC0gYygiR086MDAwOTI2NyIsICJHTzowMDM0MTk4IiwgIkdPOjAwMzYwMDMiLCAiR086MDAzNjUwMCIsICJHTzoxOTkwOTI4IiwgIkdPOjAwMzQ1OTkiLCAiR086MDAzNDk3NiIsICJHTzowMDE2MjM2IiwgIkdPOjAwMTA1MDYiKQoKZ29zdHJlcyA8LSBncHJvZmlsZXIyOjpnb3N0KHN0cmVzc19wYXRod2F5cykKc3RyZXNzX2Vuc2cgPC0gZ29zdHJlcyRtZXRhJGdlbmVzX21ldGFkYXRhJHF1ZXJ5JHF1ZXJ5XzEkZW5zZ3MKCmVuczJzeW0gPC0gQW5ub3RhdGlvbkRiaTo6c2VsZWN0KAogIEVuc0RiLkhzYXBpZW5zLnY4NiwKICBrZXlzID0ga2V5cyhFbnNEYi5Ic2FwaWVucy52ODYpLAogIGNvbHVtbnMgPSBjKCJTWU1CT0wiKQopCgpzdHJlc3NfZ2VuZXMgPC0gZW5zMnN5bSAlPiUKICBmaWx0ZXIoR0VORUlEICVpbiUgc3RyZXNzX2Vuc2cpICU+JQogIHB1bGwoU1lNQk9MKQpgYGAKCgpgYGB7cn0KIyBGaWx0ZXIgZGF0YSBmb3Igc2VsZWN0ZWQgc3RyZXNzIGdlbmVzCnRwbXMkTmFtZSA8LSB1bmxpc3QobGFwcGx5KHRwbXMkTmFtZSwgdG91cHBlcikpICAjIENhcGl0YWxpemUgZ2VuZSBuYW1lcwpjYXQoIiUgc3RyZXNzIGdlbmVzIGluIGRhdGFzZXQ6IiwgMTAwKm1lYW4oc3RyZXNzX2dlbmVzICVpbiUgdHBtcyROYW1lKSkKCnN0cmVzc190cG1zIDwtIHRwbXMgJT4lIGZpbHRlcihOYW1lICVpbiUgc3RyZXNzX2dlbmVzKQpgYGAKCgpgYGB7cn0KIyBTZWxlY3Qgb25seSB0aGUgU0tNIGFuZCBPU0tNIHNhbXBsZXMKIyBTY2FsZSB0aGUgVFBNcyBvZiBlYWNoIGdlbmUgdG8gYmUgYmV0d2VlbiAwIGFuZCAxCiMgTm90ZTogSSBjb3VsZG4ndCBmaWd1cmUgb3V0IGhvdyB0byBzY2FsZSByb3ctd2lzZSwgc28gSSB0cmFuc3Bvc2VkIGluc3RlYWQKc2NhbGVkX3RwbXMgPC0gc3RyZXNzX3RwbXNbMzo0MV0gJT4lCiAgc2VsZWN0KGNvbnRhaW5zKCJTS01fUiIpKSAlPiUKICB0KCkgJT4lCiAgYXMuZGF0YS5mcmFtZSgpICU+JQogIG11dGF0ZV9hbGwoc2NhbGVzOjpyZXNjYWxlKSAlPiUgICMgTWluLW1heCBzY2FsaW5nIHRvIGJlIGJldHdlZW4gMCBhbmQgMQogIHQoKSAlPiUKICBhc190aWJibGUoKSAlPiUKICBtdXRhdGUoR2VuZSA9IHN0cmVzc190cG1zJE5hbWUpICU+JQogIHJlbG9jYXRlKEdlbmUpCmBgYAoKCmBgYHtyfQojIEZvcm1hdCBkYXRhIGZyb20gd2lkZSB0byBsb25nIGZvciBwbG90dGluZwpkZiA8LSBzY2FsZWRfdHBtcyAlPiUKICBwaXZvdF9sb25nZXIoCiAgICBjb250YWlucygiXyIpLAogICAgbmFtZXNfdG8gPSBjKCJEYXkiLCAiRmFjdG9ycyIsICJSZXAiKSwKICAgIG5hbWVzX3BhdHRlcm4gPSAiZChcXGQrKV8oXFx3KylfUihcXGQrKSIKICApICU+JQogIHJlbmFtZShzY2FsZWRfVFBNID0gdmFsdWUpICU+JQogIG11dGF0ZV9pZihpcy5jaGFyYWN0ZXIsIGFzLmZhY3RvcikKYGBgCgojIEJveHBsb3QKCmBgYHtyfQpkZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBEYXksIHkgPSBzY2FsZWRfVFBNKSkgKwogIHN0YXRfYm94cGxvdChnZW9tID0nZXJyb3JiYXInLCB3aWR0aCA9IDAuNSkgKwogIGdlb21fYm94cGxvdCgpICsKICBmYWNldF93cmFwKH5GYWN0b3JzKSArIAogIGdlb21faml0dGVyKGNvbG9yID0gImJsdWUiLCB3aWR0aCA9IDAuMDUpCmBgYAoKIyBMaW5lcGxvdAoKYGBge3IgZmlnLndpZHRoPTh9CmdncCA8LSBkZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBEYXksIHkgPSBzY2FsZWRfVFBNLCBncm91cCA9IEdlbmUpKSArCiAgZ2VvbV9saW5lKGFlcyhjb2xvciA9IEdlbmUpKSArCiAgZmFjZXRfd3JhcCh+RmFjdG9ycyArIFJlcCkKCmdncGxvdGx5KGdncCkKYGBgCgoKYGBge3J9CnJlcyA8LSBhb3Yoc2NhbGVkX1RQTSB+IERheSwgZGF0YSA9IGRmICU+JSBmaWx0ZXIoRmFjdG9ycyA9PSAiT1NLTSIpKQpzdW1tYXJ5KHJlcykKYGBgCgpgYGB7cn0KcmVzIDwtIGFvdihzY2FsZWRfVFBNIH4gRGF5LCBkYXRhID0gZGYgJT4lIGZpbHRlcihGYWN0b3JzID09ICJTS00iKSkKc3VtbWFyeShyZXMpCmBgYAoKCgoKCgoKCg==